Introduction

Bayesian MLR: Differential analysis of the distribution of peripheral blood cells. Bayesian multinomial logistic regression model initialization and example application to a generated toy dataset.

Install the model_multinom.R function by cloning the github repository and load the function in R.

source('./model_multinom.R')

Usage

In this example, we generate a toy dataset and apply the Bayesian multinomial logistic regression model to the distributions of 10 randomly generated samples (labelled 5 younger and 5 older) with max 10000 counts with 3 cell types with set proportions that add up to 1.

Load libraries

library(tidyverse)
## Warning: replacing previous import 'vctrs::data_frame' by 'tibble::data_frame'
## when loading 'dplyr'
library(rjags)
library(coda)
library(hablar)
library(patchwork)

Generate toy dataset

set.seed(562)
# 5 younger samples with set probabilities (0.1, 0.2, 0.7) of 3 categories (i.e. cell types)
sim.data.Y <- t(rmultinom(5, size = 10000, prob = c(0.1,0.2,0.7)))
# 5 older samples with set probabilities (0.2, 0.3, 0.5) of 3 categories (i.e. cell types)
sim.data.O <- t(rmultinom(5, size = 10000, prob = c(0.2,0.3,0.5)))
#bind sample count together
sim.data <- rbind(sim.data.Y, sim.data.O)

#create an age variable with 3 younger samples and 3 older samples
age.group <- c(rep(1,5), rep(2,5))
#pull simulated data and age variable in jags data object
jags.data <- list(y = sim.data, #simulated counts
                  age.group = factor(age.group), #age group label (younger = 1, older = 2)
                  N.sample = nrow(sim.data), #number of samples
                  N.ct = ncol(sim.data), #number of cell types
                  N.total = rep(10000,10), #total number of counts per sample
                  N.age = 2, #number of age groups
                  sex = sample(1:2, 10, replace = TRUE) #sex label 
)
jags.data
## $y
##       [,1] [,2] [,3]
##  [1,] 1019 1993 6988
##  [2,]  973 2031 6996
##  [3,]  964 1994 7042
##  [4,] 1043 1977 6980
##  [5,] 1009 1991 7000
##  [6,] 1941 3008 5051
##  [7,] 2066 2909 5025
##  [8,] 2061 2906 5033
##  [9,] 2005 2902 5093
## [10,] 2052 2929 5019
## 
## $age.group
##  [1] 1 1 1 1 1 2 2 2 2 2
## Levels: 1 2
## 
## $N.sample
## [1] 10
## 
## $N.ct
## [1] 3
## 
## $N.total
##  [1] 10000 10000 10000 10000 10000 10000 10000 10000 10000 10000
## 
## $N.age
## [1] 2
## 
## $sex
##  [1] 1 1 1 1 1 2 2 1 1 2

Run Bayesian multinomial logistic regression model

#Run model with 500 burn-in iterations and 1000 total iterations
jags <- jags.model(textConnection(multinom.model),data=jags.data, n.adapt=500, inits = list(.RNG.name = "base::Wichmann-Hill", .RNG.seed = 5))
## Compiling model graph
##    Resolving undeclared variables
##    Allocating nodes
## Graph information:
##    Observed stochastic nodes: 10
##    Unobserved stochastic nodes: 9
##    Total graph size: 132
## 
## Initializing model
#monitor predicted probabilities p
test <- coda.samples(jags, c('p'), n.adapt = 500, n.iter=1000)

Obtain predicted probabilities of cell types for each sample

#Get estimates of p[i,j] for sample i and category j
coda.summary <- summary(test) 
predicted.prob <- coda.summary$statistics %>% as_tibble() %>% select(Mean)
predicted.prob <- matrix(as.vector(predicted.prob$Mean), nrow = 10, ncol = 3)
predicted.prob
##            [,1]      [,2]      [,3]
##  [1,] 0.1001999 0.1995656 0.7002345
##  [2,] 0.1001999 0.1995656 0.7002345
##  [3,] 0.1001999 0.1995656 0.7002345
##  [4,] 0.1001999 0.1995656 0.7002345
##  [5,] 0.1001999 0.1995656 0.7002345
##  [6,] 0.2019357 0.2948881 0.5031762
##  [7,] 0.2019357 0.2948881 0.5031762
##  [8,] 0.2030306 0.2905812 0.5063882
##  [9,] 0.2030306 0.2905812 0.5063882
## [10,] 0.2019357 0.2948881 0.5031762
#predicted probabilities for each sample should add up to 1
apply(predicted.prob,1,sum)
##  [1] 1 1 1 1 1 1 1 1 1 1

Diagnostics for model convergence

Density and trace plots

plot(test)

Autocorrelation

autocorr.plot(test)

Geweke convergence diagnostic

geweke.diag(test)
## [[1]]
## 
## Fraction in 1st window = 0.1
## Fraction in 2nd window = 0.5 
## 
##   p[1,1]   p[2,1]   p[3,1]   p[4,1]   p[5,1]   p[6,1]   p[7,1]   p[8,1] 
##  1.04621  1.04621  1.04621  1.04621  1.04621  0.06107  0.06107 -2.32453 
##   p[9,1]  p[10,1]   p[1,2]   p[2,2]   p[3,2]   p[4,2]   p[5,2]   p[6,2] 
## -2.32453  0.06107 -0.48937 -0.48937 -0.48937 -0.48937 -0.48937 -1.86740 
##   p[7,2]   p[8,2]   p[9,2]  p[10,2]   p[1,3]   p[2,3]   p[3,3]   p[4,3] 
## -1.86740  0.82126  0.82126 -1.86740 -0.10182 -0.10182 -0.10182 -0.10182 
##   p[5,3]   p[6,3]   p[7,3]   p[8,3]   p[9,3]  p[10,3] 
## -0.10182  1.68337  1.68337  0.73616  0.73616  1.68337